bootstrap = function(X,alpha=0.05,nreps=1000) {
  #******************************************************************************
  # Calculates three different types of confidence interval for the population mean:
  # Standard;
  # Percentile;
  # Bias corrected 100(1-alpha).
  #
  # Text used is Bryan F J Manly (1998) , Randomization, Bootstrap and Monte Carlo Methods in
  # Biology, 2nd edition, Chapman and Hall.
  #******************************************************************************
  #
  lenX = length(X)
  Xmean = rep(0,nreps)
  Xstan = rep(0,nreps)
  #boot=matrix(0,nrow=nreps,ncol=lenX)
  #
  # Generate the bootstrap samples and store them in boot[,]
  #
  for (j in 1:nreps) {
    boot = sample(X,lenX,replace=T)
    Xmean[j] = mean(boot)
  }
  boot.mean = mean(Xmean)
  boot.stan = sd(Xmean)
  #
  # standard confidence interval
  #
  zalpha = qnorm(1-(alpha/2))
  upper.stan = boot.mean + zalpha*boot.stan
  lower.stan = boot.mean - zalpha*boot.stan
  #
  # percentile confidence interval
  # I have left out the standard 0.5 correction for percentiles as this
  # correction shouldn't be needed as nreps gets big
  #
  sort.mean = sort(Xmean)
  i.lower = round(nreps*alpha/2)
  i.upper = round(nreps*(1-(alpha/2)))
  upper.per = sort.mean[i.upper]
  lower.per = sort.mean[i.lower]
  #
  # Bias-corrected method
  #
  # Find proportion of time that bootstrap mean exceeds sample mean
  #
  diff = Xmean - mean(X)
  p.exceed = length(diff[diff > 0])/nreps
  #
  # calculate value of standard normal (x0) that is exceeded with prob p
  #
  z0 = qnorm(p.exceed)
  upper.bias = sort.mean[round(pnorm(2*z0 + zalpha) * nreps)]
  lower.bias = sort.mean[round(pnorm(2*z0 - zalpha) * nreps)]
  list(standard=c(lower.stan,upper.stan),percentile=c(lower.per,upper.per),bias=c(lower.bias,upper.bias))
}



library(mgcv)

slotter = function(band, zlong) {
#**************************************************************************
  nx = dim(band)[1]
  ny = dim(band)[2]
  m = 0
  for (j in 1:nx) {
    for (k in 1:ny) {
      if (is.na(band[j,k])==F) {m=m+1; band[j,k] = zlong[m]}
    }
  }
  band
}


dis.two.pts = function(lat,lon) {
  #***************************************************************************
  # Dave Maxwell distance (geodesic) in metres between the two locations
  # Banerjee, Carlin and Gelfand (2004) Hierarchical Modeling and Analysis for
  # Spatial Data eqn 1.10.
  # lat and lon need to be vectors of length 2
  #***************************************************************************
  
  R = 6371 # radius of Earth approx 6371km
  
  # degrees to radians
  theta = 2*pi*lat / 360
  lambda = 2*pi*lon / 360
  
  dis = 1000 * R * acos (sin(theta[1])*sin(theta[2]) + cos(theta[1])*cos(theta[2])*cos(lambda[1]-lambda[2]))
  dis
}


bander.setup = function(xlimit, ylimit, nx, latt, longg, minsep) {
  #*******************************************************************
  # Sets up the grid for later modelling. It outputs the marginal
  # X and Y values, the matrix of T and F values which shows where
  # prediction can occur, and a vector of X and Y values that are where
  # prediction needs to happen. Thesew can be slotted into the matrix
  # later for the image() plot.
  #
  # Initially, Creates a grid of x,y co-ordinates where nx determines
  # the grid size and number in the X direction. The values of xlimit()
  # and ylimit() determine the first and last values. ny ensures the
  # same bin size for the y-variable as for the x-variable.
  # Then uses those points in GRIDD that are less than or equal to MINSEP
  # away from existing data points to TRUE. Distances in METRES are
  # calculated. This produces a MATRIX output of smoothed values.
  #*******************************************************************
  #xlimit=c(-10,4); ylimit=c(48,62); nx=20
  #latt=lat.small.pres
  #longg=long.small.pres
  #minsep=20000
  
  size = (xlimit[2] - xlimit[1]) / nx
  ny = round((ylimit[2] - ylimit[1]) / size)
  xgrid = seq(xlimit[1], xlimit[2], length.out=nx)
  ygrid = seq(ylimit[1], ylimit[2], length.out=ny)
  ncells = nx * ny
  xlong = rep(0, ncells)
  ylong = rep(0, ncells)
  
  nlat = length(latt)
  band = matrix(NA, nrow=nx, ncol=ny)
  dd = rep(-99, nlat)
  cc = 0
  
  for (j in 1:nx) {
    xx = xgrid[j]
    for (k in 1:ny) {
      yy = ygrid[k]
      for (m in 1:nlat) {
        dd[m] = dis.two.pts(c(yy, latt[m]), c(xx,longg[m]))
      }
      arse = dd <= minsep
      if (sum(arse)>=1) {
        cc = cc + 1
        xlong[cc] = xx
        ylong[cc] = yy
        band[j,k] = T
      }
    }}
  xlong = xlong[1:cc]
  ylong = ylong[1:cc]
  list(band=band, xgrid=xgrid, ygrid=ygrid, xlong=xlong, ylong=ylong)
}


simpson.jb = function(X, species='cols') {
  ####********************************************************
  #### Simpson index
  ####********************************************************
  
  if (species=='rows') X=t(X)
  
  nstations = dim(X)[1]
  simpson = rep(NA, nstations)
  stat.sums = rowSums(X)
  index = 1:nstations
  index2 = index[stat.sums!=0]
  
  for (j in index2) {
    simpson[j] = 1 - sum((X[j,] / stat.sums[j])^2)
  }
  simpson
}


shannon.jb = function(X, species='cols') {
  ####********************************************************
  #### Shannon index
  ####********************************************************
  
  if (species=='rows') X=t(X)
  
  nstations = dim(X)[1]
  nspecies = dim(X)[2]
  shannon = rep(NA, nstations)
  stat.sums = rowSums(X)
  stations.pos = c(1:nstations)[stat.sums>0]
  
  for (j in stations.pos) {
    summer = 0
    for (k in 1:nspecies) {
      p = X[j,k]/stat.sums[j]
      if (p!=0) summer = summer + p*log(p)
    }
    shannon[j] = -summer
  }
  shannon
}


richness.jb =  function(X, species='cols') {
  ####********************************************************
  #### Species richness per station
  ####********************************************************
  
  if (species=='rows') X=t(X)
  
  nstations = dim(X)[1]
  rich = rep(NA, nstations)
  
  for (j in 1:nstations) {
    rich[j] = sum(X[j,]>0)
  }
  rich
}


abundance.jb =  function(X, species='cols') {
  ####********************************************************
  #### Abundance per station
  ####********************************************************
  
  if (species=='rows') X=t(X)
  
  abun = as.data.frame(rowSums(X))[,1]
  abun
}



ss.species = function(X, clusters, species.names, species='cols') {
  #*****************************************************************
  # Allocates species contribution in terms of sum of within SS over
  # ALL clusters.
  # X = stations (rows) by species (columns) matrix
  # clusters = cluster allocation per station
  #*****************************************************************
  
  if (species=='rows') X=t(X)
  
  nspecies = dim(X)[2]
  nclusters = max(clusters)
  ss = rep(0, nspecies)
  
  for (k in 1:nclusters) {
  counts.use = X[clusters==k,]
  
  for (j in 1:nspecies) {
    ss[j] = ss[j] + sum((counts.use[,j] - mean(counts.use[,j]))^2)
  }
  }
  
  order.ss = order(ss, decreasing=T)
  species.ss = species.names[order.ss]
  sort.ss = ss[order.ss]
  perc.ss = 100 * sort.ss / sum(ss)
  
  list(species = species.ss, ss = sort.ss, percent = perc.ss)
}



ss.species.cluster = function(X, clusters, number, species.names, species='cols') {
  #*****************************************************************
  # Allocates species contribution in terms of within SS for a
  # named CLUSTER (see ss.species for summation over all clusters).
  # X = stations (rows) by species (columns) matrix
  # clusters = cluster allocation per station
  # number = cluster that you want an answer for
  #*****************************************************************
  
  if (species=='rows') X=t(X)
  
  nspecies = dim(X)[2]
  
  ss = rep(0, nspecies)
  counts.use = X[clusters==number,]
  
  for (j in 1:nspecies) {
    ss[j] = sum((counts.use[,j] - mean(counts.use[,j]))^2)
  }
  
  order.ss = order(ss, decreasing=T)
  species.ss = species.names[order.ss]
  sort.ss = ss[order.ss]
  perc.ss = 100 * sort.ss / sum(ss)
  
  list(species = species.ss, ss = sort.ss, percent = perc.ss)
}

####
#### Note that RATIOS and WITHIN functions are assessing the same
#### thing. One is sort of the reverse of the other.
####


ratios = function(counts, numbers, nstart=25) {
  #*******************************************************************
  # Plots the between to total SS ratio for a range of cluster numbers.
  # This is plotting the proportion of variation explained.Kind of the
  # inverse of plotting the within ss.
  #*******************************************************************
  len.numbers = length(numbers)
  ratio = rep(0, len.numbers)
  
  for (j in 1:len.numbers) {
    res = kmeans(counts, numbers[j], nstart=nstart)
    ratio[j] = res$between / res$totss
  }
  plot(numbers, ratio, xlab="No of clusters", ylab="Variance explained")
  list(numbers=numbers, ratio=ratio)
}



within = function(counts, numbers, nstart=25) {
  #*******************************************************************
  # Plots the within group SS ratio for a range of cluster numbers
  #*******************************************************************
  len.numbers = length(numbers)
  within = rep(0, len.numbers)
  
  for (j in 1:len.numbers) {
    res = kmeans(counts, numbers[j], nstart=nstart)
    within[j] = res$tot.withinss
  }
  plot(numbers, within, xlab="No of clusters", ylab="Within SS")
  list(numbers=numbers, within=within)
}


